Create a Seurat object from one sample and evaluate quality control metrics Data is located at : /ourdisk/hpc/iicomicswshp/dont_archive/omics_workshop_2025/day2/data/
library(here)
here() starts at /Users/LHAYES5/Desktop/iicomics_2025
Note: update the “sample” to the KZ sample you are using
seurat <- Read10X(data.dir = "data/KZ2/filtered_feature_bc_matrix/")
seurat <- CreateSeuratObject(counts = seurat, project = "KZ2")
# samp group
# KZ1 Control
# KZ2 PKD
# KZ3 Control
# KZ4 PKD
# seurat$group <- "CON"
seurat$group <- "PKD"
## check out the object
seurat
An object of class Seurat
32285 features across 9249 samples within 1 assay
Active assay: RNA (32285 features, 0 variable features)
1 layer present: counts
## which sample did you try? how many genes? how many cells?
# Check for doublets using the package and function scDblFinder(), but the function does not take a Seurat object as an argument, but rather works with SingleCellExperiment objects. First we must convert then we can run the double detection
sce <- as.SingleCellExperiment(seurat)
Warning: Layer ‘data’ is empty
Warning: Layer ‘scale.data’ is empty
sce <- scDblFinder(sce)
Creating ~7400 artificial doublets...
Dimensional reduction
Evaluating kNN...
Training model...
iter=0, 1732 cells excluded from training.
iter=1, 1694 cells excluded from training.
iter=2, 1665 cells excluded from training.
Threshold found:0.467
886 (9.6%) doublets called
table(sce$scDblFinder.class)
singlet doublet
8363 886
# Add in quality control metrics to seurat object
seurat$doublet <- sce$scDblFinder.class
seurat <- PercentageFeatureSet(seurat, pattern = "^mt-", col.name = "percent.mt")
seurat <- PercentageFeatureSet(seurat, pattern = "^Rp[s/l]", col.name = "percent.rb")
VlnPlot(seurat, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), ncol = 2)
Warning: Default search for "data" layer in "RNA" assay yielded no results; utilizing "counts" layer instead.
ggsave(here("plots/1.QC/raw_Vln.jpg"), width = 10, height = 10)
FeatureScatter(seurat, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
ggsave(here("plots/1.QC/raw_countXfeat.jpg"), width = 5, height = 5)
FeatureScatter(seurat, feature1 = "nCount_RNA", feature2 = "percent.mt")
ggsave(here("plots/1.QC/raw_countXmito.jpg"), width = 5, height = 5)
FeatureScatter(seurat, feature1 = "percent.rb", feature2 = "percent.mt")
ggsave(here("plots/1.QC/raw_riboXmito.jpg"), width = 5, height = 5)
FeatureScatter(seurat, feature1 = "percent.rb", feature2 = "nFeature_RNA")
ggsave(here("plots/1.QC/raw_riboXfeat.jpg"), width = 5, height = 5)
Fill in the “##”” with a values for quality filtering and then check the data
seurat[["QC"]] = ifelse(seurat$doublet == "doublet", "doublet", "Pass")
seurat[["QC"]] = ifelse(seurat$nFeature_RNA < 250 & seurat$QC == 'Pass', 'low_Feat', seurat@meta.data$QC)
seurat[["QC"]] = ifelse(seurat$percent.mt > 25 & seurat$QC == 'Pass', 'high_mt', seurat@meta.data$QC)
seurat[["QC"]] = ifelse(seurat$percent.rb < 1 & seurat$QC == 'Pass', "low_rb", seurat@meta.data$QC)
VlnPlot(seurat, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), group.by = "QC", ncol = 2)
Warning: Default search for "data" layer in "RNA" assay yielded no results; utilizing "counts" layer instead.
ggsave(here("plots/1.QC/filt_vln.jpg"), width = 10, height = 10)
FeatureScatter(seurat, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by = "QC")
ggsave(here("plots/1.QC/filt_countXfeat.jpg"), width = 5, height = 5)
FeatureScatter(seurat, feature1 = "nCount_RNA", feature2 = "percent.mt", group.by = "QC")
ggsave(here("plots/1.QC/filt_countXmito.jpg"), width = 5, height = 5)
FeatureScatter(seurat, feature1 = "percent.rb", feature2 = "percent.mt", group.by = "QC")
ggsave(here("plots/1.QC/filt_riboXmito.jpg"), width = 5, height = 5)
FeatureScatter(seurat, feature1 = "percent.rb", feature2 = "nFeature_RNA", group.by = "QC")
ggsave(here("plots/1.QC/filt_riboXfeat.jpg"), width = 5, height = 5)
seurat.filt <- subset(seurat, subset = QC =="Pass")
seurat.filt
An object of class Seurat
32285 features across 6175 samples within 1 assay
Active assay: RNA (32285 features, 0 variable features)
1 layer present: counts
# Which sample did you run? How many cells were filtered out? How many passed the filter? post the answer on canvas.
seurat.filt <- NormalizeData(seurat.filt)
Normalizing layer: counts
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
rlang::last_trace()
<error/rlang_error>
Error in `ggsave()`:
! Cannot find directory ]8;;file:///Users/LHAYES5/Desktop/iicomics_2025/scripts/plots/1.QCplots/1.QC]8;;.
ℹ Please supply an existing directory or use `create.dir = TRUE`.
---
Backtrace:
▆
1. └─ggplot2::ggsave("plots/1.QC/HVG_v1.jpg", width = 5, height = 5)
Run ]8;;x-r-run:rlang::last_trace(drop = FALSE)rlang::last_trace(drop = FALSE)]8;; to see 3 hidden frames.
seurat.filt <- FindVariableFeatures(seurat.filt, nfeatures = 3000)
Finding variable features for layer counts
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
seurat.filt <- ScaleData(seurat.filt)
Centering and scaling data matrix
|
| | 0%
|
|================================================ | 33%
|
|================================================================================================= | 67%
|
|=================================================================================================================================================| 100%
VariableFeaturePlot(seurat.filt)
Warning in scale_x_log10() :
log-10 transformation introduced infinite values.
ggsave(here("plots/1.QC/HVG_v1.jpg"), width = 5, height = 5)
Warning in scale_x_log10() :
log-10 transformation introduced infinite values.
## Plot variable features with labels
top10 <- head(x = VariableFeatures(seurat.filt), 10)
plot1 <- VariableFeaturePlot(object = seurat.filt)
LabelPoints(plot = plot1, points = top10, repel = TRUE)
When using repel, set xnudge and ynudge to 0 for optimal results
Warning in scale_x_log10() :
log-10 transformation introduced infinite values.
ggsave(here("plots/1.QC/HVG_v2.jpg"), width = 5, height = 5)
Warning in scale_x_log10() :
log-10 transformation introduced infinite values.
## List 3 genes that are highly variable.
save(seurat.filt, file = "data/KZ2_object.rds")
sessionInfo()
R version 4.4.1 (2024-06-14)
Platform: aarch64-apple-darwin20
Running under: macOS Sonoma 14.5
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
time zone: America/Chicago
tzcode source: internal
attached base packages:
[1] stats4 stats graphics grDevices utils datasets methods base
other attached packages:
[1] here_1.0.1 scDblFinder_1.18.0 SingleCellExperiment_1.26.0 SummarizedExperiment_1.34.0 Biobase_2.64.0
[6] GenomicRanges_1.56.2 GenomeInfoDb_1.40.1 IRanges_2.38.1 S4Vectors_0.42.1 BiocGenerics_0.50.0
[11] MatrixGenerics_1.16.0 matrixStats_1.5.0 ggplot2_3.5.2 Seurat_5.2.1 SeuratObject_5.0.2
[16] sp_2.1-4
loaded via a namespace (and not attached):
[1] RcppAnnoy_0.0.22 splines_4.4.1 later_1.4.2 BiocIO_1.14.0 bitops_1.0-9
[6] R.oo_1.27.0 tibble_3.2.1 polyclip_1.10-7 XML_3.99-0.18 fastDummies_1.7.5
[11] lifecycle_1.0.4 rprojroot_2.0.4 edgeR_4.2.2 globals_0.18.0 lattice_0.22-6
[16] MASS_7.3-64 magrittr_2.0.3 sass_0.4.10 rmarkdown_2.29 limma_3.60.6
[21] plotly_4.10.4 jquerylib_0.1.4 yaml_2.3.10 metapod_1.12.0 httpuv_1.6.16
[26] sctransform_0.4.1 spam_2.11-1 spatstat.sparse_3.1-0 reticulate_1.42.0 cowplot_1.1.3
[31] pbapply_1.7-2 RColorBrewer_1.1-3 abind_1.4-8 zlibbioc_1.50.0 Rtsne_0.17
[36] R.utils_2.12.3 purrr_1.0.4 RCurl_1.98-1.16 GenomeInfoDbData_1.2.12 ggrepel_0.9.6
[41] irlba_2.3.5.1 listenv_0.9.1 spatstat.utils_3.1-2 goftest_1.2-3 RSpectra_0.16-2
[46] dqrng_0.4.1 spatstat.random_3.3-2 fitdistrplus_1.2-2 parallelly_1.45.0 DelayedMatrixStats_1.26.0
[51] codetools_0.2-20 DelayedArray_0.30.1 scuttle_1.14.0 tidyselect_1.2.1 UCSC.utils_1.0.0
[56] farver_2.1.2 viridis_0.6.5 ScaledMatrix_1.12.0 spatstat.explore_3.3-4 GenomicAlignments_1.40.0
[61] jsonlite_2.0.0 BiocNeighbors_1.22.0 progressr_0.15.1 ggridges_0.5.6 survival_3.8-3
[66] scater_1.32.1 systemfonts_1.2.1 tools_4.4.1 ragg_1.3.3 ica_1.0-3
[71] Rcpp_1.0.14 glue_1.8.0 gridExtra_2.3 SparseArray_1.4.8 xfun_0.52
[76] dplyr_1.1.4 withr_3.0.2 fastmap_1.2.0 bluster_1.14.0 digest_0.6.37
[81] rsvd_1.0.5 R6_2.6.1 mime_0.13 textshaping_1.0.0 colorspace_2.1-1
[86] scattermore_1.2 tensor_1.5 spatstat.data_3.1-4 R.methodsS3_1.8.2 tidyr_1.3.1
[91] generics_0.1.3 data.table_1.17.0 rtracklayer_1.64.0 httr_1.4.7 htmlwidgets_1.6.4
[96] S4Arrays_1.4.1 uwot_0.2.2 pkgconfig_2.0.3 gtable_0.3.6 lmtest_0.9-40
[101] XVector_0.44.0 htmltools_0.5.8.1 dotCall64_1.2 scales_1.3.0 png_0.1-8
[106] spatstat.univar_3.1-1 scran_1.32.0 knitr_1.50 rstudioapi_0.17.1 reshape2_1.4.4
[111] rjson_0.2.23 nlme_3.1-167 curl_6.2.2 cachem_1.1.0 zoo_1.8-12
[116] stringr_1.5.1 KernSmooth_2.23-26 parallel_4.4.1 miniUI_0.1.1.1 vipor_0.4.7
[121] ggrastr_1.0.2 restfulr_0.0.15 pillar_1.10.1 grid_4.4.1 vctrs_0.6.5
[126] RANN_2.6.2 promises_1.3.3 BiocSingular_1.20.0 beachmat_2.20.0 xtable_1.8-4
[131] cluster_2.1.8 beeswarm_0.4.0 evaluate_1.0.4 locfit_1.5-9.10 cli_3.6.5
[136] compiler_4.4.1 Rsamtools_2.20.0 rlang_1.1.6 crayon_1.5.3 future.apply_1.11.3
[141] labeling_0.4.3 plyr_1.8.9 ggbeeswarm_0.7.2 stringi_1.8.4 viridisLite_0.4.2
[146] deldir_2.0-4 BiocParallel_1.38.0 munsell_0.5.1 Biostrings_2.72.1 lazyeval_0.2.2
[151] spatstat.geom_3.3-5 Matrix_1.7-2 RcppHNSW_0.6.0 patchwork_1.3.0 sparseMatrixStats_1.16.0
[156] future_1.49.0 statmod_1.5.0 shiny_1.10.0 ROCR_1.0-11 igraph_2.1.4
[161] bslib_0.9.0 xgboost_1.7.8.1